package com.tzxx.spider.spiderbots;

import us.codecraft.webmagic.Page;
import com.tzxx.spider.domain.Article;
import com.tzxx.spider.domain.Template;
import com.tzxx.spider.service.ArticleService;
import com.tzxx.spider.spiderbots.base.BaseUrlDetailSpider;
import com.tzxx.spider.utils.HtmlUtils;
import com.tzxx.spider.utils.SpringContextUtil;

/**
 * 详情内容爬取
 */
public class DetailSpider extends BaseUrlDetailSpider {

    public DetailSpider(Template template) {
        super(template);
    }

    @Override
    public void process(Page page) {
        ArticleService articleService = SpringContextUtil.getBean("articleService");

        Article article = Article.builder()
                .title(HtmlUtils.html2Text(page.getHtml().xpath(template.getTitleSelector()).get()))
                .content(page.getHtml().xpath(template.getContentSelector()).get())
                .author(HtmlUtils.html2Text(page.getHtml().xpath(template.getAuthorSelector()).get()))
                .link(page.getUrl().get())
                .build();
        articleService.insert(article);
    }
}
